[IOMMU] clean interrupt remapping and queued invalidation
authorKeir Fraser <keir.fraser@citrix.com>
Mon, 19 Oct 2009 09:54:35 +0000 (10:54 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Mon, 19 Oct 2009 09:54:35 +0000 (10:54 +0100)
This patch enlarges interrupt remapping table to fix the out-of range
table access when using many multiple-function PCI devices.
Invalidation queue is also expanded.

Signed-Off-By: Zhai Edwin <edwin.zhai@intel.com>
Signed-Off-By: Cui Dexuan <dexuan.cui@intel.com>
xen/drivers/passthrough/vtd/intremap.c
xen/drivers/passthrough/vtd/iommu.c
xen/drivers/passthrough/vtd/iommu.h
xen/drivers/passthrough/vtd/qinval.c
xen/drivers/passthrough/vtd/utils.c

index 24e22be320cbdc6dd93d3e6b69267cbb68ff3ac2..595afbaa1e50689661f49bafb8108d993ba6c603 100644 (file)
@@ -146,6 +146,7 @@ static int remap_entry_to_ioapic_rte(
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    u64 entry_base;
 
     if ( ir_ctrl == NULL )
     {
@@ -164,9 +165,11 @@ static int remap_entry_to_ioapic_rte(
 
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
+    entry_base = ir_ctrl->iremap_maddr +
+                 (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
     iremap_entries =
-        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
-    iremap_entry = &iremap_entries[index];
+        (struct iremap_entry *)map_vtd_domain_page(entry_base);
+    iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
 
     old_rte->vector = iremap_entry->lo.vector;
     old_rte->delivery_mode = iremap_entry->lo.dlm;
@@ -192,6 +195,7 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu,
     int index;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    u64 entry_base;
 
     remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
@@ -208,15 +212,17 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu,
     {
         dprintk(XENLOG_ERR VTDPREFIX,
                 "%s: intremap index (%d) is larger than"
-                " the maximum index (%ld)!\n",
+                " the maximum index (%d)!\n",
                 __func__, index, IREMAP_ENTRY_NR - 1);
         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
         return -EFAULT;
     }
 
+    entry_base = ir_ctrl->iremap_maddr +
+                 (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
     iremap_entries =
-        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
-    iremap_entry = &iremap_entries[index];
+        (struct iremap_entry *)map_vtd_domain_page(entry_base);
+    iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
 
     memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
 
@@ -425,6 +431,7 @@ static int remap_entry_to_msi_msg(
     int index;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    u64 entry_base;
 
     if ( ir_ctrl == NULL )
     {
@@ -447,9 +454,11 @@ static int remap_entry_to_msi_msg(
 
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
 
+    entry_base = ir_ctrl->iremap_maddr +
+                 (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
     iremap_entries =
-        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
-    iremap_entry = &iremap_entries[index];
+        (struct iremap_entry *)map_vtd_domain_page(entry_base);
+    iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
 
     msg->address_hi = MSI_ADDR_BASE_HI;
     msg->address_lo =
@@ -485,6 +494,7 @@ static int msi_msg_to_remap_entry(
     int index;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    u64 entry_base;
 
     remap_rte = (struct msi_msg_remap_entry *) msg;
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
@@ -502,16 +512,18 @@ static int msi_msg_to_remap_entry(
     {
         dprintk(XENLOG_ERR VTDPREFIX,
                 "%s: intremap index (%d) is larger than"
-                " the maximum index (%ld)!\n",
+                " the maximum index (%d)!\n",
                 __func__, index, IREMAP_ENTRY_NR - 1);
         msi_desc->remap_index = -1;
         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
         return -EFAULT;
     }
 
+    entry_base = ir_ctrl->iremap_maddr +
+                 (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
     iremap_entries =
-        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
-    iremap_entry = &iremap_entries[index];
+        (struct iremap_entry *)map_vtd_domain_page(entry_base);
+    iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
     memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
 
     /* Set interrupt remapping table entry */
@@ -619,7 +631,7 @@ int enable_intremap(struct iommu *iommu)
     if ( ir_ctrl->iremap_maddr == 0 )
     {
         drhd = iommu_to_drhd(iommu);
-        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, 1);
+        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR );
         if ( ir_ctrl->iremap_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
index 4dabe6f4ecf960db141384daba70388c115c71a6..af611a9d40c076278ccfd39e2ff69790af318590 100644 (file)
@@ -135,16 +135,16 @@ void iommu_flush_cache_entry(void *addr)
 
 void iommu_flush_cache_page(void *addr, unsigned long npages)
 {
-    __iommu_flush_cache(addr, PAGE_SIZE_4K * npages);
+    __iommu_flush_cache(addr, PAGE_SIZE * npages);
 }
 
 /* Allocate page table, return its machine address */
 u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)
 {
     struct acpi_rhsa_unit *rhsa;
-    struct page_info *pg;
+    struct page_info *pg, *cur_pg;
     u64 *vaddr;
-    int node = -1;
+    int node = -1, i;
 
     rhsa = drhd_to_rhsa(drhd);
     if ( rhsa )
@@ -154,11 +154,17 @@ u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)
                              (node == -1 ) ? 0 : MEMF_node(node));
     if ( !pg )
         return 0;
-    vaddr = __map_domain_page(pg);
-    memset(vaddr, 0, PAGE_SIZE * npages);
 
-    iommu_flush_cache_page(vaddr, npages);
-    unmap_domain_page(vaddr);
+    cur_pg = pg;
+    for ( i = 0; i < npages; i++ )
+    {
+        vaddr = __map_domain_page(cur_pg);
+        memset(vaddr, 0, PAGE_SIZE);
+
+        iommu_flush_cache_page(vaddr, 1);
+        unmap_domain_page(vaddr);
+        cur_pg++;
+    }
 
     return page_to_maddr(pg);
 }
index 2629bbd9ecb91a62ad686d9b15a9a624029fc84c..78b46271e65d80021dd9c2ca3eabb227599024be 100644 (file)
@@ -302,7 +302,23 @@ struct iremap_entry {
     }hi;
   };
 };
-#define IREMAP_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct iremap_entry))
+
+/* Max intr remapping table page order is 8, as max number of IRTEs is 64K */
+#define IREMAP_PAGE_ORDER  8
+
+/*
+ * VTd engine handles 4K page, while CPU may have different page size on
+ * different arch. E.g. 16K on IPF.
+ */
+#define IREMAP_ARCH_PAGE_ORDER  (IREMAP_PAGE_ORDER + PAGE_SHIFT_4K - PAGE_SHIFT)
+#define IREMAP_ARCH_PAGE_NR     ( IREMAP_ARCH_PAGE_ORDER < 0 ?  \
+                                1 :                             \
+                                1 << IREMAP_ARCH_PAGE_ORDER )
+
+/* Each entry is 16 bytes, so 2^8 entries per 4K page */
+#define IREMAP_ENTRY_ORDER  ( PAGE_SHIFT - 4 )
+#define IREMAP_ENTRY_NR     ( 1 << ( IREMAP_PAGE_ORDER + 8 ) )
+
 #define iremap_present(v) ((v).lo & 1)
 #define iremap_fault_disable(v) (((v).lo >> 1) & 1)
 
@@ -392,12 +408,17 @@ struct qinval_entry {
     }q;
 };
 
-/* Order of queue invalidation pages */
-#define IQA_REG_QS       0
-#define NUM_QINVAL_PAGES (1 << IQA_REG_QS)
+/* Order of queue invalidation pages(max is 8) */
+#define QINVAL_PAGE_ORDER   2
 
-/* Each entry is 16 byte */
-#define QINVAL_ENTRY_NR  (1 << (IQA_REG_QS + 8))
+#define QINVAL_ARCH_PAGE_ORDER  (QINVAL_PAGE_ORDER + PAGE_SHIFT_4K - PAGE_SHIFT)
+#define QINVAL_ARCH_PAGE_NR     ( QINVAL_ARCH_PAGE_ORDER < 0 ?  \
+                                1 :                             \
+                                1 << QINVAL_ARCH_PAGE_ORDER )
+
+/* Each entry is 16 bytes, so 2^8 entries per page */
+#define QINVAL_ENTRY_ORDER  ( PAGE_SHIFT - 4 )
+#define QINVAL_ENTRY_NR     (1 << (QINVAL_PAGE_ORDER + 8))
 
 /* Status data flag */
 #define QINVAL_STAT_INIT  0
@@ -429,9 +450,9 @@ struct qinval_entry {
 #define IEC_GLOBAL_INVL         0
 #define IEC_INDEX_INVL          1
 #define IRTA_REG_EIME_SHIFT     11
-#define IRTA_REG_TABLE_SIZE     7    // 4k page = 256 * 16 byte entries
-                                     // 2^^(IRTA_REG_TABLE_SIZE + 1) = 256
-                                     // IRTA_REG_TABLE_SIZE = 7
+
+/* 2^(IRTA_REG_TABLE_SIZE + 1) = IREMAP_ENTRY_NR */
+#define IRTA_REG_TABLE_SIZE     ( IREMAP_PAGE_ORDER + 7 )
 
 #define VTD_PAGE_TABLE_LEVEL_3  3
 #define VTD_PAGE_TABLE_LEVEL_4  4
index fe24863f966b4737860ead17d06ec8772c52947e..7dde42b302ccfacd53b0274489c1fb67773c4efc 100644 (file)
@@ -45,17 +45,15 @@ static void print_qi_regs(struct iommu *iommu)
 
 static int qinval_next_index(struct iommu *iommu)
 {
-    u64 tail, head;
+    u64 tail;
 
     tail = dmar_readq(iommu->reg, DMAR_IQT_REG);
     tail >>= QINVAL_INDEX_SHIFT;
 
-    head = dmar_readq(iommu->reg, DMAR_IQH_REG);
-    head >>= QINVAL_INDEX_SHIFT;
-
-    /* round wrap check */
-    if ( ( tail + 1 ) % QINVAL_ENTRY_NR == head  )
-        return -1;
+    /* (tail+1 == head) indicates a full queue, wait for HW */
+    while ( ( tail + 1 ) % QINVAL_ENTRY_NR ==
+            ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
+        cpu_relax();
 
     return tail;
 }
@@ -77,11 +75,13 @@ static int gen_cc_inv_dsc(struct iommu *iommu, int index,
     unsigned long flags;
     struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+    u64 entry_base = qi_ctrl->qinval_maddr +
+                 (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
 
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
     qinval_entries =
-        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
-    qinval_entry = &qinval_entries[index];
+        (struct qinval_entry *)map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
     qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
     qinval_entry->q.cc_inv_dsc.lo.granu = granu;
     qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
@@ -121,14 +121,14 @@ static int gen_iotlb_inv_dsc(struct iommu *iommu, int index,
     unsigned long flags;
     struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+    u64 entry_base = qi_ctrl->qinval_maddr +
+                 (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
 
-    if ( index == -1 )
-        return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
     qinval_entries =
-        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
-    qinval_entry = &qinval_entries[index];
+        (struct qinval_entry *)map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
     qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
     qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
     qinval_entry->q.iotlb_inv_dsc.lo.dr = dr;
@@ -172,13 +172,13 @@ static int gen_wait_dsc(struct iommu *iommu, int index,
     unsigned long flags;
     struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+    u64 entry_base = qi_ctrl->qinval_maddr +
+                 (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
 
-    if ( index == -1 )
-        return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
     qinval_entries =
-        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
-    qinval_entry = &qinval_entries[index];
+        (struct qinval_entry *)map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
     qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
     qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
     qinval_entry->q.inv_wait_dsc.lo.sw = sw;
@@ -247,14 +247,14 @@ static int gen_dev_iotlb_inv_dsc(struct iommu *iommu, int index,
     unsigned long flags;
     struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+    u64 entry_base = qi_ctrl->qinval_maddr +
+                 (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
 
-    if ( index == -1 )
-        return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
     qinval_entries =
-        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
-    qinval_entry = &qinval_entries[index];
+        (struct qinval_entry *)map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
     qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
     qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
     qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
@@ -295,14 +295,14 @@ static int gen_iec_inv_dsc(struct iommu *iommu, int index,
     unsigned long flags;
     struct qinval_entry *qinval_entry = NULL, *qinval_entries;
     struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+    u64 entry_base = qi_ctrl->qinval_maddr +
+                 (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
 
-    if ( index == -1 )
-        return -1;
     spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
 
     qinval_entries =
-        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
-    qinval_entry = &qinval_entries[index];
+        (struct qinval_entry *)map_vtd_domain_page(entry_base);
+    qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
     qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
     qinval_entry->q.iec_inv_dsc.lo.granu = granu;
     qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
@@ -445,7 +445,7 @@ int enable_qinval(struct iommu *iommu)
     if ( qi_ctrl->qinval_maddr == 0 )
     {
         drhd = iommu_to_drhd(iommu);
-        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, NUM_QINVAL_PAGES);
+        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, QINVAL_ARCH_PAGE_NR);
         if ( qi_ctrl->qinval_maddr == 0 )
         {
             dprintk(XENLOG_WARNING VTDPREFIX,
@@ -464,7 +464,7 @@ int enable_qinval(struct iommu *iommu)
      * registers are automatically reset to 0 with write
      * to IQA register.
      */
-    qi_ctrl->qinval_maddr |= IQA_REG_QS;
+    qi_ctrl->qinval_maddr |= QINVAL_PAGE_ORDER;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
index 8edd102077053da37a0024058fb8053961543527..2a7798c09624ca955301e8d889c79958cc53af80 100644 (file)
@@ -226,8 +226,7 @@ static void dump_iommu_info(unsigned char key)
             /* Dump interrupt remapping table. */
             u64 iremap_maddr = dmar_readq(iommu->reg, DMAR_IRTA_REG);
             int nr_entry = 1 << ((iremap_maddr & 0xF) + 1);
-            struct iremap_entry *iremap_entries =
-                (struct iremap_entry *)map_vtd_domain_page(iremap_maddr);
+            struct iremap_entry *iremap_entries = NULL;
 
             printk("  Interrupt remapping table (nr_entry=0x%x. "
                 "Only dump P=1 entries here):\n", nr_entry);
@@ -235,7 +234,18 @@ static void dump_iommu_info(unsigned char key)
                    "FPD P\n");
             for ( i = 0; i < nr_entry; i++ )
             {
-                struct iremap_entry *p = iremap_entries + i;
+                struct iremap_entry *p;
+                if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
+                {
+                    /* This entry across page boundry */
+                    u64 entry_base = iremap_maddr +
+                        (( i >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
+                    if ( iremap_entries )
+                        unmap_vtd_domain_page(iremap_entries);
+                    iremap_entries =
+                        (struct iremap_entry *)map_vtd_domain_page(entry_base);
+                }
+                p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
 
                 if ( !p->lo.p )
                     continue;
@@ -246,8 +256,9 @@ static void dump_iommu_info(unsigned char key)
                     (u32)p->lo.dlm, (u32)p->lo.tm, (u32)p->lo.rh,
                     (u32)p->lo.dm, (u32)p->lo.fpd, (u32)p->lo.p);
             }
+            if ( iremap_entries )
+                unmap_vtd_domain_page(iremap_entries);
 
-            unmap_vtd_domain_page(iremap_entries);
         }
     }